package test.webmagic;

import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

public class GithubRepoPageProcessor2 implements PageProcessor {

    // 部分一：抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private Site site = Site.me().setRetryTimes(3).setSleepTime(3000);

    @Override
    // process是定制爬虫逻辑的核心接口，在这里编写抽取逻辑
    public void process(Page page) {
        System.out.println("page: " + page.getUrl());
        System.out.println("page: " + page.getHtml().get());
        Selectable xpath = page.getHtml().xpath("//caption/text()");
        List<String> matchesElements = xpath.all();
        System.out.println("matchesElements：" + matchesElements.size());
        System.out.println(matchesElements);
//        page.addTargetRequests(page.getHtml().links().regex("(https://www\\.primefaces\\.org/showcase/[\\w\\-]+/[\\w\\-]+)").all());
    }
 
    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
//        System.setProperty("javax.net.debug", "all");
//        System.setProperty("https.protocols", "TLSv1,TLSv1.1,TLSv1.2");
        System.setProperty("selenuim_config", "C:\\Atos\\Mike\\GitRepos\\Mitdy-SportGame\\Mitdy-SportGame\\src\\main\\resources\\config.ini"); 
        Spider.create(new GithubRepoPageProcessor2())
                //从"https://github.com/code4craft"开始抓
                .addUrl("http://web1.sa8888.net/?lang=2&ball=bk")
                .setDownloader(new SeleniumDownloader("C:\\Atos\\Mike\\ChromeDriver\\chromedriver.exe"))
                //开启5个线程抓取
                .thread(2)
                //启动爬虫
                .run();
    }
}